import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
%matplotlib inline
x = np.random.normal(size=(20, 2)) # random random points (x1, x2) [mean = 0, std = 1]
y = np.repeat([-1, 1], [10, 10]) # create the y vector [repeat -1 10 times and then 1 10 times]
x[y == 1, :] = x[y == 1, :] + 1 # try somehow to separate the two classes
plt.figure(figsize = (10,5))
plt.scatter(x[:, 0], x[:, 1], c=y + 3, edgecolors='black', marker='o', s=100)
<matplotlib.collections.PathCollection at 0x1fc8825b880>
from sklearn.svm import SVC
dat = pd.DataFrame({'x1': x[:, 0], 'x2': x[:, 1], 'y': y})
svmfit = SVC(kernel='linear', C=10, gamma='scale')
svmfit.fit(dat[['x1', 'x2']].values, dat['y'].values)
print(svmfit)
SVC(C=10, kernel='linear')
h = 0.02 # step size in the mesh
x_min, x_max = x[:, 0].min() - 1, x[:, 0].max() + 1 # Get lower and upper bound of x
y_min, y_max = x[:, 1].min() - 1, x[:, 1].max() + 1 # Get lower and upper bound of y
xx, yy = np.meshgrid(np.arange(x_min, x_max, h), np.arange(y_min, y_max, h))# Create the grid
# Predict the class for each meshgrid point
Z = svmfit.predict(np.c_[xx.ravel(), yy.ravel()])
Z = Z.reshape(xx.shape)
# Plot the decision boundary and data points
plt.figure(figsize = (10,5))
plt.contourf(xx, yy, Z, alpha=0.6) # Figure, line, background colors
plt.scatter(x[:, 0], x[:, 1], c=y + 3, edgecolors='k', s = 100)
plt.xlabel('x1')
plt.ylabel('x2')
plt.title('SVM Decision Boundary')
plt.show()
h = 0.02 # step size in the mesh
x_min, x_max = x[:, 0].min() - 1, x[:, 0].max() + 1 # Get lower and upper bound of x
y_min, y_max = x[:, 1].min() - 1, x[:, 1].max() + 1 # Get lower and upper bound of y
xx, yy = np.meshgrid(np.arange(x_min, x_max, h), np.arange(y_min, y_max, h))# Create the grid
# Predict the class for each meshgrid point
Z = svmfit.predict(np.c_[xx.ravel(), yy.ravel()])
Z = Z.reshape(xx.shape)
# Plot the decision boundary and data points
plt.figure(figsize = (10,5))
plt.contourf(xx, yy, Z, alpha=0.6)
plt.scatter(x[:, 0], x[:, 1], c=y + 3, edgecolors='k', s = 100)
plt.xlabel('x1')
plt.ylabel('x2')
plt.title('SVM Decision Boundary')
# Show the support vectors
plt.scatter(svmfit.support_vectors_[:, 0], svmfit.support_vectors_[:, 1], # x of support vectors, y of support vectors
facecolors='none', edgecolors='red', linewidths=2, s=100, marker='o')
plt.show()
# obtained from: https://jakevdp.github.io/PythonDataScienceHandbook/05.07-support-vector-machines.html
# Changed edgecolor to be black in constrast with website because it was invisible without the change
def plot_svc_decision_function(model, ax=None, plot_support=True):
"""Plot the decision function for a 2D SVC"""
if ax is None:
ax = plt.gca()
xlim = ax.get_xlim()
ylim = ax.get_ylim()
# create grid to evaluate model
x = np.linspace(xlim[0], xlim[1], 30)
y = np.linspace(ylim[0], ylim[1], 30)
Y, X = np.meshgrid(y, x)
xy = np.vstack([X.ravel(), Y.ravel()]).T
P = model.decision_function(xy).reshape(X.shape)
# plot decision boundary and margins
ax.contour(X, Y, P, colors='k',
levels=[-1, 0, 1], alpha=0.5,
linestyles=['--', '-', '--'])
# plot support vectors
if plot_support:
ax.scatter(model.support_vectors_[:, 0],
model.support_vectors_[:, 1],
s=300, linewidth=1, facecolors='none', edgecolor = 'black');
ax.set_xlim(xlim)
ax.set_ylim(ylim)
plt.scatter(dat['x1'], dat['x2'], c=dat['y'], s=50, cmap='autumn')
plot_svc_decision_function(svmfit);
np.random.seed(42)
x = np.random.normal(size=(200, 2))
x[:100, :] += 2.5
x[100:150, :] -= 2.5
y = np.concatenate((np.repeat(1, 150), np.repeat(2, 50)))
dat = pd.DataFrame(x, columns = ['X1', 'X2'])
dat["y"] = y
dat["y"] = dat["y"].astype("category") # So that the following plot understands it as category and not as continuous
import plotly.express as px
# Plot data
fig = px.scatter(dat, x="X2", y="X1", color = "y", symbol = "y")
fig
y[y == 1] = 0
y[y == 2] = 1
y # The figure below expects 0 and 1 for labels instead of 1 and 2
array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1])
model = SVC(kernel='rbf', C=1, gamma=1)
model.fit(x,y)
SVC(C=1, gamma=1)In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
SVC(C=1, gamma=1)
from mlxtend.plotting import plot_decision_regions
plot_decision_regions(x, y, clf=model, legend=2)
<AxesSubplot:>